This notebook is used to initialize the directory structure for the materials directory using an example YAML (or json) file of a Materials Project document.
In [ ]:
import json
import os
import re
import shutil
import yaml
In [ ]:
CONFIG = {
"materials": {
"rootdir": "../materials",
"fname_pattern": r"^material_.+\.yaml$"
},
"tasks": {
"rootdir": "../tasks",
"fname_pattern": r"^task_.+\.yaml$"
}
}
In [ ]:
desired = set()
example = """## Example response in JSON
```json
%s
```
"""
def make_dir(d, rootdir):
for k, v in d.items():
if k == "_id":
# Ignore object id key, which is useless.
continue
subpath = os.path.join(rootdir, k)
try:
os.makedirs(subpath)
except:
pass
fname = os.path.join(subpath, "README.md")
desired.add(subpath)
desired.add(fname)
if not os.path.exists(fname):
with open(fname, "w") as f:
if v is not None:
data = example % json.dumps(v, indent=4)
else:
data = ""
f.write(data)
else:
with open(fname) as f:
data = f.read().strip()
data = data.split("## Example response in JSON")[0]
if "## Example response in JSON" not in data:
if v is not None:
data += "\n\n"
data += example % json.dumps(v, indent=4)
with open(fname, "w") as f:
print(f"Adding example to {k}")
f.write(data)
if isinstance(v, dict) and "formula" not in k:
# Ignore formulas which has material specific keys.
make_dir(v, rootdir=subpath)
elif isinstance(v, list) and len(v) and isinstance(v[0], dict):
# Recurse into lists of dicts
make_dir(v[0], rootdir=subpath)
In [ ]:
for k, v in CONFIG.items():
print(f"Generating for {k}...")
for fname in [fname for fname in os.listdir('.') if re.match(v["fname_pattern"], fname)]:
with open(fname) as f:
doc = yaml.load(f)
make_dir(doc, rootdir=v["rootdir"])
In [ ]:
import glob
from tqdm import tqdm_notebook as tqdm
for k, v in CONFIG.items():
print(f"Cleanup for {k}...")
rootdir = v["rootdir"]
desired = {d for d in desired if f"{rootdir}/sbxn" not in d or f"{rootdir}/sbxd" not in d}
paths_to_kill = set(glob.glob(f"{rootdir}/**", recursive=True))
paths_to_kill -= desired
paths_to_kill -= {f"{rootdir}/"}
paths_to_kill = {d for d in desired if f"{rootdir}/sbxn" in d or f"{rootdir}/sbxd" in d}
for path in tqdm(paths_to_kill):
try:
os.remove(path)
except OSError:
try:
os.removedirs(path)
except OSError:
pass